# 提取ppt中文本到word中（无第一页与最后一页）
# 将要提取ppt路径写入pptPath中，将在程序根目录中创建同名的doc文档


import pptx
import docx

pptPaths = ['美育/警匪片备课.pptx', '美育/武侠电影.pptx', '美育/战争片鉴赏.pptx']
pageTem = []


def pushDoc(text):
    try:
        if text not in pageTem:
            doc.add_paragraph(text)
            pageTem.append(text)
    except:
        pass


def getText(slide):
    tem = []
    p = ''
    for shape in slide.shapes:
        if shape.has_text_frame:
            text = shape.text_frame.text
            if text not in tem:
                tem.append(text)
                p = p + '\n' + text
    if p != '':
        pushDoc(p)


def delPage(prs, index):
    slides = list(prs.slides._sldIdLst)
    prs.slides._sldIdLst.remove(slides[index])


if __name__ == '__main__':
    for pptPath in pptPaths:
        doc = docx.Document()
        ppt = pptx.Presentation(pptPath)
        name = pptPath.split('/')[-1].split('.')[0]
        slideNum = len(ppt.slides)
        # 删除最后一页
        delPage(ppt, (slideNum - 1))
        # 删除第一页
        delPage(ppt, 0)
        slideNum = len(ppt.slides)
        for slide in ppt.slides:
            getText(slide)
        doc.save(name + ".docx")
